package com.shujia.spark

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

object Demo7Sample {
  def main(args: Array[String]): Unit = {

    val conf: SparkConf = new SparkConf()
      .setAppName("make")
      .setMaster("local")


    val sc = new SparkContext(conf)


    val studentsRDD: RDD[String] = sc.textFile("spark/data/students.txt")


    /**
      * sample算子 抽样,需要一个比列
      *
      */


    val sampleRDD: RDD[String] = studentsRDD.sample(true, 0.1)


    sampleRDD.foreach(println)


    println(sampleRDD.count())


  }

}
